TLC Workshop

from IPython.display import YouTubeVideo
YouTubeVideo('J9MOMpYWpt8', width=800, height=400)

Exploratory Data Analysis

Venue Frequency

Suburb level

venue_activity_complete['suburb_nopc'].value_counts().head(10).sort_values().plot(kind="barh"); plt.show()
_images/Analysis_TLC_Workshop_10_0.png
dfu = venue_activity_complete[venue_activity_complete.suburb_nopc\
                                .isin(venue_activity_complete['suburb_nopc'].\
                                      value_counts().head(10).index)]

dfu = dfu[dfu.suburb_nopc != 'Melbourne VIC']

# to get the dataframe in the correct shape, unstack the groupby result
dfu = dfu.groupby(['Decade']).suburb_nopc.value_counts(normalize=True).unstack()

# plot
ax = dfu.plot(kind='line', figsize=(7, 5), xlabel='Decade', ylabel='Prop', rot=0, marker='.')
ax.legend(title='', bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
_images/Analysis_TLC_Workshop_11_0.png
dfu = venue_activity_complete[venue_activity_complete.suburb_nopc\
                                .isin(venue_activity_complete['suburb_nopc'].\
                                      value_counts().head(6).index)]

dfu = dfu[dfu.suburb_nopc != 'Melbourne VIC']

# to get the dataframe in the correct shape, unstack the groupby result
dfu = dfu.groupby(['Decade']).suburb_nopc.value_counts().unstack()

# plot
ax = dfu.plot(kind='bar', figsize=(7, 5), xlabel='Decade', ylabel='Freq', rot=0)
ax.legend(title='', bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
_images/Analysis_TLC_Workshop_12_0.png

North or South of the river?

venue_activity_complete['Side of River'].value_counts(normalize=True).plot.pie(autopct='%1.1f%%')
plt.show()
_images/Analysis_TLC_Workshop_14_0.png
# to get the dataframe in the correct shape, unstack the groupby result
dfu = venue_activity_complete.groupby(['Decade'])['Side of River'].value_counts(normalize=True).unstack()

# plot
ax = dfu.plot(kind='line', figsize=(7, 5), xlabel='Decade', ylabel='Prop', rot=0, marker='.')
ax.legend(title='North or South River?', bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
_images/Analysis_TLC_Workshop_15_0.png
dfu = venue_activity_complete.groupby(['Decade'])['Side of River'].value_counts().unstack()

# plot
ax = dfu.plot(kind='bar', figsize=(7, 5), xlabel='Decade', ylabel='Freq', rot=0)
ax.legend(title='North or South River?', bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
_images/Analysis_TLC_Workshop_16_0.png

Top venues by decade

# create a frequency table of venue occurences by decade 
all_events = venue_activity_complete[['Venue','Decade']]\
.value_counts().reset_index()\
.groupby(['Venue','Decade'])\
.sum()\
.reset_index()

# clean column names
all_events.columns = ['Venue','Decade','Freq']

# show first rows
all_events.sort_values('Freq',ascending=False).head(10)
Venue Decade Freq
117 Corner Hotel, Melbourne, Australia 2010 1081
116 Corner Hotel, Melbourne, Australia 2000 744
336 Northcote Social Club, Melbourne, Australia 2010 471
199 Forum Theatre, Melbourne, Australia 2010 445
432 Rod Laver Arena, Melbourne, Australia 2010 409
571 The Hi-Fi, Melbourne, Australia 2000 400
431 Rod Laver Arena, Melbourne, Australia 2000 355
355 Palais Theatre, Melbourne, Australia 2010 349
572 The Hi-Fi, Melbourne, Australia 2010 346
0 170 Russell, Melbourne, Australia 2010 319

1970s

top_venues = all_events[(all_events.Freq >= 1)]['Venue'].unique()
active_in_1970s = all_events[(all_events.Decade == 1970) & (all_events.Freq > 40)]['Venue'].unique()
top_venues_70s = list(set(top_venues) & set(active_in_1970s))

top_in_state = all_events[all_events.Venue.isin(top_venues_70s)]\
.sort_values(['Decade','Venue'])

fig = px.line(top_in_state.sort_values('Decade'), 
              x="Decade", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 1970s (Melbourne):</b> Decades',
              markers=True)

fig.show()
all_events_yr = venue_activity_complete[venue_activity_complete.Venue.isin(active_in_1970s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1965) & (all_events_yr.Year < 1980)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues most active in 1970s (Melbourne):</b> Year',markers=True)
fig.show()
active_in_1970s = all_events[(all_events.Decade == 1970)]['Venue'].unique()
all_events_yr = venue_activity_complete[venue_activity_complete.Venue.isin(active_in_1970s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1973) & (all_events_yr.Year < 1980) &
                              (all_events_yr.Venue.isin(all_events_yr[all_events_yr.Freq > 5]['Venue'].unique()))]

piv70s = all_events_yr.pivot(index='Venue', columns='Year', values='Freq').fillna(0)
clipped = piv70s.clip(upper=1).sum(axis=1).reset_index()
piv70s = piv70s[piv70s.index.isin(clipped[clipped[0] > 1].Venue)]

change70s = piv70s.copy()
# change70s['70-71'] = change70s[[1970,1971]].pct_change(axis=1)[1971]
# change70s['71-72'] = change70s[[1971,1972]].pct_change(axis=1)[1972]
# change70s['72-73'] = change70s[[1972,1973]].pct_change(axis=1)[1973]
# change70s['73-74'] = change70s[[1973,1974]].pct_change(axis=1)[1974]
change70s['74-75'] = change70s[[1974,1975]].pct_change(axis=1)[1975]
change70s['75-76'] = change70s[[1975,1976]].pct_change(axis=1)[1976]
change70s['76-77'] = change70s[[1976,1977]].pct_change(axis=1)[1977]
change70s['77-78'] = change70s[[1977,1978]].pct_change(axis=1)[1978]
change70s['78-79'] = change70s[[1978,1979]].pct_change(axis=1)[1979]
change70s = change70s.drop([1974,1975,1976,1977,1978,1979],axis=1).unstack().reset_index()
# change70s = change70s.drop([1970,1971,1972,1973,1974,1975,1976,1977,1978,1979],axis=1).unstack().reset_index()
change70s = change70s[~change70s.isin([np.nan, np.inf, -np.inf]).any(1)]
biggestmovers70s = change70s.sort_values(0, ascending=False).head(5)['Venue'].unique()

fig, ax = plt.subplots(figsize=(10,10))
ax = sns.heatmap(piv70s, annot=True)
ax.set(xlabel="", ylabel="")
plt.show()
_images/Analysis_TLC_Workshop_22_0.png
forheatmap = change70s[change70s['Venue'].isin(biggestmovers70s)].pivot(index='Venue', columns='Year', values=0).fillna(0)
fig, ax = plt.subplots(figsize=(6,6))
ax = sns.heatmap(forheatmap, annot=True)
ax.set(xlabel="", ylabel="")
plt.title('Largest YoY fluctuations fot venues in the 70s')
plt.show()
_images/Analysis_TLC_Workshop_23_0.png
all_events_yr = venue_activity_complete[venue_activity_complete.Venue.isin(biggestmovers70s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1970) & (all_events_yr.Year < 1985)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Venues, Biggest Movers, 1970s (Melbourne):</b> Year',markers=True)
fig.show()

Which band played at the Bananas the most times?

# Which band played at the Bananas the most times?
venue_activity_complete[venue_activity_complete['Venue'].str.contains('Bananas')].Artist.value_counts().head(3)
The Boys Next Door    20
Rose Tattoo           12
Cold Chisel            7
Name: Artist, dtype: int64

Which band played at the Hearts Nightclub (Polaris Inn) the most times?

# Which band played at the Hearts Nightclub (Polaris Inn) the most times?
venue_activity_complete[venue_activity_complete['Venue'].str.contains('Polaris Inn')].Artist.value_counts().head(3)
The Boys Next Door    17
Men at Work            7
The Jetsonnes          4
Name: Artist, dtype: int64

Which venue did The Boys Next Door / The Birthday Party play most at?

# Which venue did The Boys Next Door / The Birthday Party play most at? 
venue_activity_complete[(venue_activity_complete['Artist'].str.contains('The Boys Next Door')) |
             (venue_activity_complete['Artist'].str.contains('The Birthday Party'))].Venue.value_counts().head(5)
Crystal Ballroom, Melbourne, Australia                 42
Tiger Lounge, Royal Oak Hotel, Melbourne, Australia    39
Bananas, Melbourne, Australia                          20
Hearts Nightclub, Polaris Inn, Melbourne, Australia    19
Bombay Rock, Melbourne, Australia                      17
Name: Venue, dtype: int64

1980s

top_venues = all_events[(all_events.Freq >= 1)]['Venue'].unique()
active_in_1980s = all_events[(all_events.Decade == 1980) & (all_events.Freq > 50)]['Venue'].unique()
top_venues_80s = list(set(top_venues) & set(active_in_1980s))

top_in_state = all_events[all_events.Venue.isin(top_venues_80s)].sort_values(['Decade','Venue'])
top_in_state = top_in_state[top_in_state.Decade > 1969]

fig = px.line(top_in_state.sort_values('Decade'), 
              x="Decade", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 1980s (Melbourne):</b> Music',markers=True)

fig.show()
all_setlists = venue_activity_complete
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_1980s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1975) & (all_events_yr.Year < 1990)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 1980s (Melbourne):</b> Year',markers=True)
fig.show()
active_in_1980s = all_events[(all_events.Decade == 1980)]['Venue'].unique()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_1980s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1979) & (all_events_yr.Year < 1990) &
                              (all_events_yr.Venue.isin(all_events_yr[all_events_yr.Freq > 5]['Venue'].unique()))]

piv80s = all_events_yr.pivot(index='Venue', columns='Year', values='Freq').fillna(0)
clipped = piv80s.clip(upper=1).sum(axis=1).reset_index()
piv80s = piv80s[piv80s.index.isin(clipped[clipped[0] > 1].Venue)]

change80s = piv80s.copy()
change80s['80-81'] = change80s[[1980,1981]].pct_change(axis=1)[1981]
change80s['81-82'] = change80s[[1981,1982]].pct_change(axis=1)[1982]
change80s['82-83'] = change80s[[1982,1983]].pct_change(axis=1)[1983]
change80s['83-84'] = change80s[[1983,1984]].pct_change(axis=1)[1984]
change80s['84-85'] = change80s[[1984,1985]].pct_change(axis=1)[1985]
change80s['85-86'] = change80s[[1985,1986]].pct_change(axis=1)[1986]
change80s['86-87'] = change80s[[1986,1987]].pct_change(axis=1)[1987]
change80s['87-88'] = change80s[[1987,1988]].pct_change(axis=1)[1988]
change80s['88-89'] = change80s[[1988,1989]].pct_change(axis=1)[1989]
change80s = change80s.drop([1980,1981,1982,1983,1984,1985,1986,1987,1988,1989],axis=1).unstack().reset_index() 
change80s = change80s[~change80s.isin([np.nan, np.inf, -np.inf]).any(1)]
biggestmovers80s = change80s.sort_values(0, ascending=False).head(5)['Venue'].unique()
print('Largest YoY fluctuations for venues in the 80s')
change80s.sort_values(0, ascending=False).head(5)
Largest YoY fluctuations for venues in the 80s
Year Venue 0
426 87-88 The Palace Complex, Melbourne, Australia 10.50
405 87-88 Metro Nightclub, Melbourne, Australia 5.00
262 84-85 The Central Club Hotel, Melbourne, Australia 2.25
462 88-89 Old Greek Theatre, Melbourne, Australia 2.00
376 86-87 Village Green Hotel, Melbourne, Australia 2.00
# change80s.sort_values(0, ascending=False).head(10).value_counts('Year')
all_events_yr = all_setlists[all_setlists.Venue.isin(biggestmovers80s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1980) & (all_events_yr.Year < 1995)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Venues, Biggest Movers, 1980s (Melbourne):</b> Year',markers=True)
fig.show()

1990s

top_venues = all_events[(all_events.Freq >= 1)]['Venue'].unique()
active_in_1990s = all_events[(all_events.Decade == 1990) & (all_events.Freq > 100)]['Venue'].unique()
top_venues_90s = list(set(top_venues) & set(active_in_1990s))

top_in_state = all_events[all_events.Venue.isin(top_venues_90s)].sort_values(['Decade','Venue'])
top_in_state = top_in_state[top_in_state.Decade > 1979]

fig = px.line(top_in_state.sort_values('Decade'), 
              x="Decade", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 1990s (Melbourne):</b> Decades',markers=True)

fig.show()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_1990s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1985) & (all_events_yr.Year < 2000)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 1990s (Melbourne):</b> Year',markers=True)
fig.show()
active_in_1990s = all_events[(all_events.Decade == 1990)]['Venue'].unique()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_1990s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1989) & (all_events_yr.Year < 2000) &
                              (all_events_yr.Venue.isin(all_events_yr[all_events_yr.Freq > 5]['Venue'].unique()))]

piv90s = all_events_yr.pivot(index='Venue', columns='Year', values='Freq').fillna(0)
clipped = piv90s.clip(upper=1).sum(axis=1).reset_index()
piv90s = piv90s[piv90s.index.isin(clipped[clipped[0] > 1].Venue)]

change90s = piv90s.copy()
change90s['90-91'] = change90s[[1990,1991]].pct_change(axis=1)[1991]
change90s['91-92'] = change90s[[1991,1992]].pct_change(axis=1)[1992]
change90s['92-93'] = change90s[[1992,1993]].pct_change(axis=1)[1993]
change90s['93-94'] = change90s[[1993,1994]].pct_change(axis=1)[1994]
change90s['94-95'] = change90s[[1994,1995]].pct_change(axis=1)[1995]
change90s['95-96'] = change90s[[1995,1996]].pct_change(axis=1)[1996]
change90s['96-97'] = change90s[[1996,1997]].pct_change(axis=1)[1997]
change90s['97-98'] = change90s[[1997,1998]].pct_change(axis=1)[1998]
change90s['98-99'] = change90s[[1998,1999]].pct_change(axis=1)[1999]
change90s = change90s.drop([1990,1991,1992,1993,1994,1995,1996,1997,1998,1999],axis=1).unstack().reset_index() 
change90s = change90s[~change90s.isin([np.nan, np.inf, -np.inf]).any(1)]
biggestmovers90s = change90s.sort_values(0, ascending=False).head(5)['Venue'].unique()
print('Largest YoY fluctuations fot venues in the 90s')
change90s.sort_values(0, ascending=False).head(5)
Largest YoY fluctuations fot venues in the 90s
Year Venue 0
360 96-97 Prince Bandroom, Melbourne, Australia 10.00
281 95-96 Corner Hotel, Melbourne, Australia 8.25
428 97-98 The Central Club Hotel, Melbourne, Australia 7.00
170 93-94 Continental Cafe, Melbourne, Australia 6.00
156 92-93 The Esplanade Hotel, Melbourne, Australia 5.00
# change90s.sort_values(0, ascending=False).head(10).value_counts('Year')
all_events_yr = all_setlists[all_setlists.Venue.isin(biggestmovers90s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1990) & (all_events_yr.Year < 2000)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Venues, Biggest Movers, 1990s (Melbourne):</b> Year',markers=True)
fig.show()

2000s

top_venues = all_events[(all_events.Freq >= 1)]['Venue'].unique()
active_in_2000s = all_events[(all_events.Decade == 2000) & (all_events.Freq > 150)]['Venue'].unique()
top_venues_00s = list(set(top_venues) & set(active_in_2000s))

top_in_state = all_events[all_events.Venue.isin(top_venues_00s)].sort_values(['Decade','Venue'])
top_in_state = top_in_state[top_in_state.Decade > 1989]

fig = px.line(top_in_state.sort_values('Decade'), 
              x="Decade", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Top music venues active in 2000s (Melbourne):</b> Decades',markers=True)

fig.show()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_2000s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1995) & (all_events_yr.Year < 2010)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 2000s (Melbourne):</b> Year',markers=True)
fig.show()
active_in_2000s = all_events[(all_events.Decade == 2000)]['Venue'].unique()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_2000s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1999) & (all_events_yr.Year < 2010) &
                              (all_events_yr.Venue.isin(all_events_yr[all_events_yr.Freq > 5]['Venue'].unique()))]

piv00s = all_events_yr.pivot(index='Venue', columns='Year', values='Freq').fillna(0)
clipped = piv00s.clip(upper=1).sum(axis=1).reset_index()
piv00s = piv00s[piv00s.index.isin(clipped[clipped[0] > 1].Venue)]

change00s = piv00s.copy()
change00s['00-01'] = change00s[[2000,2001]].pct_change(axis=1)[2001]
change00s['01-02'] = change00s[[2001,2002]].pct_change(axis=1)[2002]
change00s['02-03'] = change00s[[2002,2003]].pct_change(axis=1)[2003]
change00s['03-04'] = change00s[[2003,2004]].pct_change(axis=1)[2004]
change00s['04-05'] = change00s[[2004,2005]].pct_change(axis=1)[2005]
change00s['05-06'] = change00s[[2005,2006]].pct_change(axis=1)[2006]
change00s['06-07'] = change00s[[2006,2007]].pct_change(axis=1)[2007]
change00s['07-08'] = change00s[[2007,2008]].pct_change(axis=1)[2008]
change00s['08-09'] = change00s[[2008,2009]].pct_change(axis=1)[2009]
change00s = change00s.drop([2000,2001,2002,2003,2004,2005,2006,2007,2008,2009],axis=1).unstack().reset_index() 
change00s = change00s[~change00s.isin([np.nan, np.inf, -np.inf]).any(1)]
biggestmovers00s = change00s.sort_values(0, ascending=False).head(5)['Venue'].unique()
print('Largest YoY fluctuations for venues in the 00s')
change00s.sort_values(0, ascending=False).head(5)
Largest YoY fluctuations for venues in the 00s
Year Venue 0
648 07-08 Palace Theatre, Melbourne, Australia 28.0
777 08-09 Thornbury Theatre, Melbourne, Australia 11.0
652 07-08 Pier Hotel, Melbourne, Australia 7.0
151 01-02 The Empress Hotel, Melbourne, Australia 6.0
732 08-09 Next, Melbourne, Australia 6.0
# change00s.sort_values(0, ascending=False).head(10).value_counts('Year')
all_events_yr = all_setlists[all_setlists.Venue.isin(biggestmovers00s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 1995) & (all_events_yr.Year < 2010)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Venues, Biggest Movers, 2000s (Melbourne):</b> Year',markers=True)
fig.show()

2010s

top_venues = all_events[(all_events.Freq >= 1)]['Venue'].unique()
active_in_2010s = all_events[(all_events.Decade == 2010) & (all_events.Freq > 250)]['Venue'].unique()
top_venues_10s = list(set(top_venues) & set(active_in_2010s))

top_in_state = all_events[all_events.Venue.isin(top_venues_00s)].sort_values(['Decade','Venue'])
top_in_state = top_in_state[top_in_state.Decade > 1999]

fig = px.line(top_in_state.sort_values('Decade'), 
              x="Decade", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Top music venues active in 2010s (Melbourne):</b> Decades',markers=True)

fig.show()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_2000s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 2005) & (all_events_yr.Year < 2015)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Music venues active in 2010s (Melbourne):</b> Year',markers=True)
fig.show()
active_in_2010s = all_events[(all_events.Decade == 2010)]['Venue'].unique()
all_events_yr = all_setlists[all_setlists.Venue.isin(active_in_2010s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 2009) & (all_events_yr.Year < 2020) &
                              (all_events_yr.Venue.isin(all_events_yr[all_events_yr.Freq > 5]['Venue'].unique()))]

piv10s = all_events_yr.pivot(index='Venue', columns='Year', values='Freq').fillna(0)
clipped = piv10s.clip(upper=1).sum(axis=1).reset_index()
piv10s = piv10s[piv10s.index.isin(clipped[clipped[0] > 1].Venue)]

change10s = piv10s.copy()
change10s['10-11'] = change10s[[2010,2011]].pct_change(axis=1)[2011]
change10s['11-12'] = change10s[[2011,2012]].pct_change(axis=1)[2012]
change10s['12-13'] = change10s[[2012,2013]].pct_change(axis=1)[2013]
change10s['13-14'] = change10s[[2013,2014]].pct_change(axis=1)[2014]
change10s['14-15'] = change10s[[2014,2015]].pct_change(axis=1)[2015]
change10s['15-16'] = change10s[[2015,2016]].pct_change(axis=1)[2016]
change10s['16-17'] = change10s[[2016,2017]].pct_change(axis=1)[2017]
change10s['17-18'] = change10s[[2017,2018]].pct_change(axis=1)[2018]
change10s['18-19'] = change10s[[2018,2019]].pct_change(axis=1)[2019]
change10s = change10s.drop([2010,2011,2012,2013,2014,2015,2016,2017,2018,2019],axis=1).unstack().reset_index() 
change10s = change10s[~change10s.isin([np.nan, np.inf, -np.inf]).any(1)]
biggestmovers10s = change10s.sort_values(0, ascending=False).head(5)['Venue'].unique()
print('Largest YoY fluctuations for venues in the 2010s')
change10s.sort_values(0, ascending=False).head(5)
Largest YoY fluctuations for venues in the 2010s
Year Venue 0
909 18-19 Stay Gold, Melbourne, Australia 27.0
247 12-13 Melbourne Town Hall, Melbourne, Australia 23.0
299 12-13 The Reverence Hotel, Melbourne, Australia 15.0
865 18-19 Gershwin Room, The Esplanade Hotel, Melbourne,... 14.0
344 13-14 Howler, Melbourne, Australia 12.5
# change10s.sort_values(0, ascending=False).head(10).value_counts('Year')
all_events_yr = all_setlists[all_setlists.Venue.isin(biggestmovers10s)][['Venue','Year']]\
.value_counts().reset_index().groupby(['Venue','Year']).sum().reset_index()
all_events_yr.columns = ['Venue','Year','Freq']
all_events_yr.Year = all_events_yr.Year.astype(int)
all_events_yr = all_events_yr[(all_events_yr.Year > 2005) & (all_events_yr.Year < 2015)]

fig = px.line(all_events_yr.sort_values('Year'), 
              x="Year", y="Freq", color="Venue",line_group="Venue",
              title= f'<b>Venues, Biggest Movers, 2010s (Melbourne):</b> Year',markers=True)
fig.show()

Genre

_images/Analysis_TLC_Workshop_60_0.png _images/Analysis_TLC_Workshop_60_1.png

Genre Map and Network analysis

Explore cultural hotspots over time using this tool.

Spotify acoustic attributes

spotify_group_by = venue_activity_complete.groupby(['Venue','Year']).mean().reset_index()

for attr in ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
          'liveness', 'valence', 'tempo']:
    fig = px.density_mapbox(venue_activity_complete.sort_values('Date'), lat="new_lats", lon="new_longs",
                            z=attr,title=attr,
                            radius=25,opacity=0.6,hover_name=venue_activity_complete.index,
                            height=800,width=800,color_continuous_scale='inferno', zoom=11,
                           animation_frame = 'Year', center=dict(lat=-37.816244, lon=144.957198))

    fig.update_layout(hovermode='closest')
    fig.show()